setwd("---")

library(glmnet)
library(edgebundleR)
library(RBGL)
library(graph)
library(gplots)
library(igraph)
library(huge)
library(lattice)
library(qgraph)
library(dplyr)
library(tidyverse)
library(viridis)

orignial_df_1 <- read.csv(file="hypts_EBGM_hlt.csv", header=T, sep="$")
head(orignial_df_1)
length(unique(orignial_df_1$var1))
length(unique(orignial_df_1$var2))

pulm_ade_df <- read.csv(file = 'pulm_meddra_hypts.csv', header=T, sep="$")
pulm_ade <- toupper(unique(pulm_ade_df$hlt))
pulm_ade <- pulm_ade[-c(1, 8,17,18,26)]
pulm_ade <- sort(pulm_ade)

pulm_ade_hlt_soc <- pulm_ade_df %>%
  select(hlt, soc) %>%
  filter(toupper(hlt) %in% pulm_ade) %>%
  filter (!duplicated(hlt))

p_df_EBGM <- orignial_df_1 %>%
                 filter(var2 %in% pulm_ade) %>%
                     filter(QUANT_05 > 1) %>%
                        group_by(var1) %>% 
                          filter(n()>1 ) %>%
                            select(var1, var2, ebgm) %>%
                              spread(var1, ebgm) %>%
                                replace(is.na(.), 0) %>%
                                  mutate_if(is.factor, as.numeric) %>%
                                    mutate_all(~ if_else(.x <= 1, 0,.x))


p_df_1 <- as.matrix(p_df_EBGM[,-1])
# write.table(p_df_EBGM, 'hypt_EB_Friedman.csv', sep = '$')

### adding drug class info
drug_act_name <- colnames(p_df_2)
length(drug_act_name)
# write.table(drug_act_name, 'hypts_134_drugs.csv', sep = '$')

### atc class
atc_class <- read.csv(file="hypts_134_drugs.csv", header=T)

### get data for friedman test
df_friedman <- orignial_df_1 %>%
                  filter(var2 %in% pulm_ade) %>%
                    filter(QUANT_05 > 1) %>%
  # get drugs with pulm ade frequency larger than 1
                      group_by(var1) %>% 
                        filter(n()>1) %>%
                          select(var1, var2, ebgm) %>%
                            replace(is.na(.), 0)
colnames(df_friedman) <- c('actsub', 'hlt', 'ebgm')

df_friedman_1 <- left_join(df_friedman, atc_class)
# write.table(df_friedman_1, 'hypt_EB_Friedman_df.csv', sep = '$')

drugs_44 <- unique(df_friedman$actsub)
# write.table(drugs_44, '44_drugs_from_hlt.csv', sep = '$')

### get data for friedman test - round 2 all the ebgm for pulmonary for 44 drugs
df_friedman_all <- orignial_df_1 %>%
                      filter(var2 %in% pulm_ade) %>%
                        filter(var1 %in% drugs_44) %>%
                        # get drugs with pulm ade frequency larger than 1
                          group_by(var1) %>% 
                            filter(n()>1) %>%
                              select(var1, var2, ebgm) %>%
                                replace(is.na(.), 0)
colnames(df_friedman_all) <- c('actsub', 'hlt', 'ebgm')

### hlt number before GLASSO
length(unique(df_friedman_all$hlt))

df_friedman_2 <- left_join(df_friedman_all, atc_class)
# write.table(df_friedman_2, 'hypt_Friedman_df_all.csv', sep = '$')

###
### pulm hlt
# perform some regularisation
out.huge = huge(cor(p_df_1), method = "glasso", lambda=0.75, verbose = FALSE,  cov.output = TRUE)
# identify the linkages
adj.mat = as.matrix(out.huge$path[[1]])
# format the colnames
nodenames = paste(gsub("","",atc_class$atc[atc_class$actsub %in% drug_act_name]), colnames(p_df_1), sep=".")
colnames(adj.mat) = rownames(adj.mat) = nodenames
# restrict attention to the connected stocks:
adj.mat = adj.mat[rowSums(adj.mat)>0,colSums(adj.mat)>0]
# plot the result
length(colnames(adj.mat))
adj.mat <- adj.mat[labs,labs]

y <- graph_from_adjacency_matrix(adj.mat, mode = "directed")
node_group <- gsub("\\..*","",colnames(adj.mat))

clr <- as.factor(node_group)
n = length(unique(clr))
levels(clr) <- rainbow(n, s = 1, v = 0.9, start = 0, end = max(1, n-1)/n, alpha = 0.95)
levels(clr) <- levels(clr)[sample(1:n)]

V(y)$color <- as.character(clr)
E(y)$color <- as.character(clr)

### using drug classes
# plot.igraph(y) 
edgebundle(y, tension=0.9, fontsize = 20, padding = 250, width = 1000)

### for cuthill mckee
### get the color and node dataframe, rename the nodes
node_color <- as.data.frame(cbind(V(y),V(y)$color))
node_color$V3 <- row.names(node_color)
row.names(node_color) <- node_color$V1
node_color <- node_color[,-1]
colnames(node_color) <- c('color', 'name')
node_color$name <- sub(".*?\\.", "", node_color$name)

out.huge = huge(cor(p_df_1), method = "glasso", lambda=0.75, verbose = FALSE,  cov.output = TRUE)
adj.mat = as.matrix(out.huge$path[[1]])

nodenames = colnames(p_df_1)
colnames(adj.mat) = rownames(adj.mat) = nodenames
# restrict attention to the connected stocks:
adj.mat = adj.mat[rowSums(adj.mat)>0,colSums(adj.mat)>0]

### cuthill mckee order
labs = c(18,1,22,3,2,13,10,5,16,4,11,6,21,20,19,15,12,8,14,7,17,9)
adj.mat <- adj.mat[labs,labs]

y <- graph_from_adjacency_matrix(adj.mat, mode = "directed")

node_color <- node_color %>%
                slice(match(V(y)$name, name))

V(y)$color <- as.character(node_color$color)
E(y)$color <- as.character(node_color$color)

### using cuthill mckee adjusted order
# plot.igraph(y)
edgebundle(y, tension=0.9, fontsize = 20, padding = 250, width = 1000)

